# 允许启动浏览器
from selenium import webdriver
#使用pandas
import pandas as pd
# 允许使用参数进行搜索
from selenium.webdriver.common.by import By
# 允许等待页面加载
from selenium.webdriver.support.ui import WebDriverWait
# 确定网页是否已加载
from selenium.webdriver.support import expected_conditions as EC
# 处理超时情
from selenium.common.exceptions import TimeoutException


options = webdriver.ChromeOptions()
driver = webdriver.Chrome('C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
options.add_argument('--ignore-certificate-errors')
options.add_argument("--test-type")
driver = webdriver.Chrome(chrome_options=options)
driver.get("https://github.com/collections/machine-learning")
#提取所有项目
projects = driver.find_elements_by_xpath("//h1[@class='h3 lh-condensed']")

# 提取每个项目的信息
project_list = {}
for proj in projects:
    proj_name = proj.text # 项目名称
    #项目的URL
    proj_url = proj.find_elements_by_xpath("a")[0].get_attribute('href')
    project_list[proj_name] = proj_url
driver.quit()

# 导出数据
project_df =pd.DataFrame.from_dict(project_list, orient = 'index')
project_df['project_name'] = project_df.index
project_df.columns = ['project_url', 'project_name']
project_df = project_df.reset_index(drop=True)
project_df['project_name'] = project_df.index
project_df.columns = ['project_url', 'project_name']
project_df = project_df.reset_index(drop=True)
project_df.to_csv('project_list.csv')